* This script contains the code to replicate Table 2, Table 3 and Table 4 in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.

clear all
log using Table2-4.log, replace
use prodndnp_fullsample.dta

/*******OLS REGRESIONS**************/

/*OLS out-of-sample program*/
cap pro drop ols_os
program define ols_os
syntax varlist(numeric min=1) [if]
marksample touse
reg `varlist' if `touse', vce(cluster auth) /* Estimating the model*/
qui scalar n1=e(N)  /*saving the number of observations*/
scalar rmseols=e(rmse)  /*saving the RMSE in-sample*/
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
predict ybols if group==2 & lprodf3<., xb  /*predicting future output using the out-of-sample*/
qui gen sqerrols= (lprodf3-ybols)^2 if ybols<. /* obtaining the residual squares*/
quietly egen peols=sum(sqerrols)if ybols<.  /* Residual Sum of Squares (RSS) of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/nols)^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di r2i1ols "R-squared"
di nols " Number of observations in the out-of-sample group"
drop peols ybols
end


/*MODEL 0*/

reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 if group==1
gen e=1 if e(sample) 
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM0= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM0)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/(nols))^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di r2i1ols "R-squared Model 0"
di nols " Number of observations in the out-of-sample group"
drop peols ybols


/*MODEL 1*/

reg lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5  if group==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM1= (lprodf3-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM1)if ybols<.  /*RSS of the predicted model*/
quietly sum peols    /*summarizing the RSS*/
quietly scalar rssols=r(max)  /*obtaining the total RSS*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/(nols))^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di r2i1ols "R-squared Model 1"
di nols " Number of observations in the out-of-sample group"
drop peols ybols

/*Diebold-Mariano test*/
gen diffsqerrlM1= sqerrolsM1-sqerrolsM0
newey2 diffsqerrlM1 if group==2 & lprodf3<., lag(12)
drop diffsqerrlM1



/* TABLE 2. Prediction accuracy of the restricted Models 1 and 2*/

/**********MODEL 2. Table 2, row coauthors' productivity**********/
ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lnetprod5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols


/**********MODEL 2. Table 2, row coauthors' coauthors productivity**********/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lnetprod25y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols

/**********MODEL 2. Table 2, row degree**********/


ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 degree5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols


/**********MODEL 2. Table 2, row degree order 2 **********/



ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 degree25y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols

/**********MODEL 2. Table 2, row Giant component **********/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 gc5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols

/**********MODEL 2. Table 2, row Betweenness **********/


ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 gc5y lbet5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols

/**********MODEL 2. Table 2, row Closeness **********/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 gc5y lclos5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols

/**********MODEL 2. Table 2, row Working with a top 1%**********/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 neiq1fs5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols

/**********MODEL 3. Table 3, row coauthors' productivity**********/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 lnetprod5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******MODEL 3. Table 3, row coauthors' coauthors productivity*******/


ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 lnetprod25y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******MODEL 3. Table 3, row Degree *******/


ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 degree5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols



/*******MODEL 3. Table 3, row Degree of order 2*******/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 degree25y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******MODEL 3. Table 3, row Giant component *******/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 gc5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******MODEL 3. Table 3, row betweenness*******/


ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 gc5y lbet5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******MODEL 3. Table 3, row closeness*******/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 gc5y lclos5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******MODEL 3. Table 3, row coauthors' top dummy variable*******/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 neiq1fs5y if e(sample)
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols

/*******Table 4*******/
/*******Multivariate Model 3***********/


ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lcprod5 lnetprod5y lnetprod25y degree5y degree25y gc5y lbet5y lclos5y neiq1fs5y if group==1
/*Diebold-Mariano test*/
gen diffsqerrlM3= sqerrols-sqerrolsM0
newey2 diffsqerrlM3 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM3 sqerrols


/*******Multivariate Model 2***********/

ols_os lprodf3 y7-y27 nopapers t7-t27 lcprodl5 lnetprod5y lnetprod25y degree5y degree25y gc5y lbet5y lclos5y neiq1fs5y if e(sample)
/*Diebold-Mariano test*/
gen diffsqerrlM2= sqerrols-sqerrolsM0
newey2 diffsqerrlM2 if group==2 & lprodf3<., lag(5)
drop diffsqerrlM2 sqerrols


log close

